library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)


options(
  tigris_class = "sf",
  tigris_use_cache = TRUE
)

Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")

Setting up the data

# load demographic data
sj_dem_by_block <- readRDS("/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Cameron_Tenner/sj_socialdistancing_demdata_02.rds") %>% 
  mutate(`ami_% over 75,000` = (100 - `ami_% under 75,000`), `ami_% over 100,000` = (100 - `ami_% under 100,000`), `% not speaking spanish` = (100 - `% speaking spanish`), `log(% speaking english well)` = log((100-exp(`log(% speaking english < well)`)))) %>% 
  dplyr::select(-`% leaving home`) # select all but social distancing data because I am going to process it differently than has been done in this


# load the social distancing data
# get San Jose block groups
scc_blockgroups <- block_groups("CA","Santa Clara", cb=F, progress_bar=F)

# Use tracts sent to us by San Jose
sj_tracts <- st_read("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp") %>%
  st_as_sf() %>%
  st_transform(st_crs(scc_blockgroups))
## Reading layer `CSJ_Census_Tracts' from data source `/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp' using driver `ESRI Shapefile'
## Simple feature collection with 219 features and 9 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## epsg (SRID):    2227
## proj4string:    +proj=lcc +lat_1=38.43333333333333 +lat_2=37.06666666666667 +lat_0=36.5 +lon_0=-120.5 +x_0=2000000.0001016 +y_0=500000.0001016001 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs
sj_citycouncil_disticts <- st_read("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp") %>%
  st_as_sf() %>%
  st_transform(st_crs(scc_blockgroups))
## Reading layer `CITY_COUNCIL_DISTRICTS' from data source `/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp' using driver `ESRI Shapefile'
## Simple feature collection with 10 features and 7 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## epsg (SRID):    2227
## proj4string:    +proj=lcc +lat_1=38.43333333333333 +lat_2=37.06666666666667 +lat_0=36.5 +lon_0=-120.5 +x_0=2000000.0001016 +y_0=500000.0001016001 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs
# from code written by others to get SJ blockgroups
sj_blockgroups <-
  scc_blockgroups %>%
  st_centroid() %>%
  st_join(sj_tracts, left = F) %>%
  st_join(sj_citycouncil_disticts%>% dplyr::select(DISTRICTS)) %>%
  mutate(
    DISTRICTS = DISTRICTS %>% factor(levels = c("1","2","3","4","5","6","7","8","9","10"))
  ) %>%
  st_set_geometry(NULL) %>%
  left_join(scc_blockgroups%>% dplyr::select(GEOID), by = "GEOID") %>%
  st_as_sf() %>%
  dplyr::select(GEOID, DISTRICTS)

# the spatial join leaves off two blockgroups which are touching district 9. The following code assigns those to district 9
sj_blockgroups$DISTRICTS[is.na(sj_blockgroups$DISTRICTS)] <- 9

# code from others in the class to get social distancing data 
sj_socialdistancing <- readRDS("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/sj_socialdistancing.rds") %>% 
  mutate(date = date_range_start %>%  substr(1,10) %>% as.Date()) %>% 
  left_join(sj_blockgroups, by = c("origin_census_block_group" = "GEOID")) %>% 
  filter(!is.na(DISTRICTS))

# obtaining weekends
weekends <-
  sj_socialdistancing %>% 
  filter(!duplicated(date)) %>% 
  arrange(date) %>% 
  mutate(
    weekend = 
      ifelse(
        (date %>% as.numeric()) %% 7 %in% c(2,3),
        T,
        F
      )
  ) %>% 
  dplyr::select(date,weekend)

sj_socialdistancing <- 
  sj_socialdistancing %>% 
  left_join(weekends)

# date of the shelter in place order
shelter_start <- "2020-03-16" %>% as.Date()

# get average staying at home on weekdays in January and February, and add trendlines and add column indicating before or after shelter in place
sj_pre_sd_at_home_average <- sj_socialdistancing %>% 
  filter(weekend == F) %>% 
  filter(date <  as.Date("2020-03-01")) %>%
  group_by(origin_census_block_group) %>% 
  summarize(completely_home_device_count = sum(completely_home_device_count), device_count = sum(device_count)) %>% 
  mutate(`% Completely at Home` = (completely_home_device_count/device_count*100) %>% round(1), `% leaving home` = (100 - `% Completely at Home`)) %>%
  left_join(sj_dem_by_block, by = c("origin_census_block_group" = "blockgroup")) %>% 
  filter(!is.na(district))

sj_pre_sd_at_home_average[is.na(sj_pre_sd_at_home_average)] <- 0

sj_pre_sd_at_home_average <- sj_pre_sd_at_home_average %>%
  mutate(
    ami_trendline =
      fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`ami_% over 100,000`)),
    income_trendline = fitted(lm((sj_pre_sd_at_home_average)$`% leaving home` ~ (sj_pre_sd_at_home_average)$median_income)),
    young_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`percent less than 30`)),
    elderly_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`percent elderly`)), 
    spanish_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`% not speaking spanish`)),
    english_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`log(% speaking english well)`)),
    api_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`% speaking api`))) %>%
  cbind(`Before or After Shelter-in-Place` = "Before shelter-in-place")

# same for since shelter in place
sj_post_sd_at_home_average <- sj_socialdistancing %>% 
  filter(weekend == F) %>% 
  filter(date >  shelter_start) %>%
  group_by(origin_census_block_group) %>% 
  summarize(completely_home_device_count = sum(completely_home_device_count), device_count = sum(device_count)) %>% 
  mutate(`% Completely at Home` = (completely_home_device_count/device_count*100) %>% round(1), `% leaving home` = (100 - `% Completely at Home`)) %>%
  left_join(sj_dem_by_block, by = c("origin_census_block_group" = "blockgroup")) %>%
  filter(!is.na(district))

sj_post_sd_at_home_average[is.na(sj_post_sd_at_home_average)] <- 0

sj_post_sd_at_home_average <- sj_post_sd_at_home_average %>%
  mutate(
    ami_trendline =
      fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`ami_% over 100,000`)),
    income_trendline = fitted(lm((sj_post_sd_at_home_average)$`% leaving home` ~ (sj_post_sd_at_home_average)$median_income)),
    young_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`percent less than 30`)),
    elderly_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`percent elderly`)), 
    spanish_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`% not speaking spanish`)),
    english_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`log(% speaking english well)`)),
    api_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`% speaking api`))) %>%
  cbind(`Before or After Shelter-in-Place` = "After shelter-in-place")

sj_dem_distancing_combined <- rbind(sj_pre_sd_at_home_average, sj_post_sd_at_home_average)

# convert the before/after column to factor
sj_dem_distancing_combined$`Before or After Shelter-in-Place` <- factor(sj_dem_distancing_combined$`Before or After Shelter-in-Place`, levels = c("Before shelter-in-place", "After shelter-in-place"))

saveRDS(sj_dem_distancing_combined, "/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/sj_socialdistancing_demdata_prepost.rds")

Now make plots.

AMI

fig_ami <- 
  plot_ly (sj_dem_distancing_combined) %>%
    add_trace(
      x = ~`ami_% over 100,000`, 
      y = ~`% leaving home`, 
      frame = ~`Before or After Shelter-in-Place`, 
      type = 'scatter', 
      mode = 'markers', 
      showlegend = F
    ) %>% 
    add_trace(
      x = ~`ami_% over 100,000`,
      y = ~ami_trendline,
      type = 'scatter',
      mode = 'lines',
      line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
      frame = ~`Before or After Shelter-in-Place`,
      showlegend = F
    ) %>% 
  animation_button(visible = F) %>%
  layout(xaxis = list(title = '% of households making over $100,000'), yaxis = list(title = '% leaving home'))

fig_ami

Median income

fig_income <- 
  plot_ly (sj_dem_distancing_combined %>% filter(median_income > 0)) %>%
    add_trace(
      x = ~median_income, 
      y = ~`% leaving home`, 
      frame = ~`Before or After Shelter-in-Place`, 
      type = 'scatter', 
      mode = 'markers', 
      showlegend = F
    ) %>% 
    add_trace(
      x = ~median_income,
      y = ~income_trendline,
      type = 'scatter',
      mode = 'lines',
      line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
      frame = ~`Before or After Shelter-in-Place`,
      showlegend = F
    ) %>% 
  animation_button(visible = F) %>%
  layout(xaxis = list(title = 'median income'), yaxis = list(title = '% leaving home'))

fig_income

Speaking English

fig_english <- 
  plot_ly (sj_dem_distancing_combined) %>%
    add_trace(
      x = ~`log(% speaking english well)`, 
      y = ~`% leaving home`, 
      frame = ~`Before or After Shelter-in-Place`, 
      type = 'scatter', 
      mode = 'markers', 
      showlegend = F
    ) %>% 
    add_trace(
      x = ~`log(% speaking english well)`,
      y = ~english_trendline,
      type = 'scatter',
      mode = 'lines',
      line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
      frame = ~`Before or After Shelter-in-Place`,
      showlegend = F
    ) %>% 
  animation_button(visible = F) %>%
  layout(xaxis = list(title = '% speaking English well'), yaxis = list(title = '% leaving home'))

fig_english

Speaking Spanish

fig_spanish <- 
  plot_ly (sj_dem_distancing_combined) %>%
    add_trace(
      x = ~`% not speaking spanish`, 
      y = ~`% leaving home`, 
      frame = ~`Before or After Shelter-in-Place`, 
      type = 'scatter', 
      mode = 'markers', 
      showlegend = F
    ) %>% 
    add_trace(
      x = ~`% not speaking spanish`,
      y = ~spanish_trendline,
      type = 'scatter',
      mode = 'lines',
      line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
      frame = ~`Before or After Shelter-in-Place`,
      showlegend = F
    ) %>% 
  animation_button(visible = F) %>%
  layout(xaxis = list(title = '% NOT speaking Spanish'), yaxis = list(title = '% leaving home'))

fig_spanish

Speaking API

fig_api <- 
  plot_ly (sj_dem_distancing_combined) %>%
    add_trace(
      x = ~`% speaking api`, 
      y = ~`% leaving home`, 
      frame = ~`Before or After Shelter-in-Place`, 
      type = 'scatter', 
      mode = 'markers', 
      showlegend = F
    ) %>% 
    add_trace(
      x = ~`% speaking api`,
      y = ~api_trendline,
      type = 'scatter',
      mode = 'lines',
      line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
      frame = ~`Before or After Shelter-in-Place`,
      showlegend = F
    ) %>% 
  animation_button(visible = F) %>%
  layout(xaxis = list(title = '% speaking API'), yaxis = list(title = '% leaving home'))

fig_api

Elderly population

fig_elderly <- 
  plot_ly (sj_dem_distancing_combined) %>%
    add_trace(
      x = ~`percent elderly`, 
      y = ~`% leaving home`, 
      frame = ~`Before or After Shelter-in-Place`, 
      type = 'scatter', 
      mode = 'markers', 
      showlegend = F
    ) %>% 
    add_trace(
      x = ~`percent elderly`,
      y = ~elderly_trendline,
      type = 'scatter',
      mode = 'lines',
      line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
      frame = ~`Before or After Shelter-in-Place`,
      showlegend = F
    ) %>% 
  animation_button(visible = F) %>%
  layout(xaxis = list(title = '% elderly'), yaxis = list(title = '% leaving home'))

fig_elderly

Young population

fig_young <- 
  plot_ly (sj_dem_distancing_combined) %>%
    add_trace(
      x = ~`percent less than 30`, 
      y = ~`% leaving home`, 
      frame = ~`Before or After Shelter-in-Place`, 
      type = 'scatter', 
      mode = 'markers', 
      showlegend = F
    ) %>% 
    add_trace(
      x = ~`percent less than 30`,
      y = ~young_trendline,
      type = 'scatter',
      mode = 'lines',
      line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
      frame = ~`Before or After Shelter-in-Place`,
      showlegend = F
    ) %>% 
  animation_button(visible = F) %>%
  layout(xaxis = list(title = '% less than 30'), yaxis = list(title = '% leaving home'))

fig_young